# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import pymysql
import traceback
import re

class DbbooksPipeline(object):
    def __init__(self):
        self.conn = pymysql.connect(host='localhost',user='root',passwd='123456',
                db='douban',charset='utf8')


    def process_item(self, item, spider):
        try:
            title = ''
            link = ''
            author = ''
            press = ''
            publish_date = ''
            price = 0.0
            star = 0.0 # NULL和0值应该不一样
            comment_num = 0
            for k,v in item.items():
                if k == 'title':
                    title = item['title'].strip()
                elif k == 'link':
                    link = item['link']
                elif k == 'author':
                    author = item['author']
                elif k == 'press':
                    press = item['press']
                elif k == 'publish_date':
                    publish_date = item['publish_date']
                elif k == 'price':
                    # 排除其它非数字和小数点的字符
                    # 有个问题在于，有些书单位不是RMB
                    price = float(re.sub(u'[^0-9.]','',item['price']))
                elif k == 'star':
                    star_str = item['star'] # star值前往往有个逗号
                    if re.search('\d',star_str) != None:
                        # 如果有评价
                        star = float(star_str)
                elif k == 'comment_num':
                    comment_num_str = item['comment_num']
                    if re.search('\d',comment_num_str) != None:
                        comment_num = int(comment_num_str)
                else:
                    print('no such key')
            sql = "insert ignore into books(title,press,star,publish_date,price,author,link,comment_num) values( \
                   '%s','%s','%f','%s','%f','%s','%s','%d')" %(title,press,star,publish_date,price,author,link,comment_num)
            self.conn.query(sql)
            self.conn.commit()
            
            return item
        except Exception as err:
            traceback.print_exc()
            print("error item:" + str(item))

    def __del__(self):
        self.close_conn()

    def close_conn(self):
        self.conn.close()
            
